import json
import urllib

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

# 设置Chrome驱动
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

# 打开目标网页
driver.get("https://www.zhipin.com/web/geek/job?query=java&city=101040100&page=1")

# 等待页面加载
time.sleep(5)

# 获取XHR请求的数据
xhr_url = "https://www.zhipin.com/wapi/zpgeek/search/joblist.json"
params = {
    "scene": "1",
    "query": "java",
    "city": "101040100",
    "experience": "",
    "payType": "",
    "partTime": "",
    "degree": "",
    "industry": "",
    "scale": "",
    "stage": "",
    "position": "",
    "jobType": "",
    "salary": "",
    "multiBusinessDistrict": "",
    "multiSubway": "",
    "page": "1",
    "pageSize": "30"
}

# 使用Selenium发送请求
response = driver.execute_script(f"""
    var xhr = new XMLHttpRequest();
    xhr.open('GET', '{xhr_url}?{urllib.parse.urlencode(params)}', false);
    xhr.setRequestHeader('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
    xhr.setRequestHeader('Referer', 'https://www.zhipin.com/web/geek/job?query=java&city=101040100&page=1');
    xhr.send(null);
    return xhr.responseText;
""")

data = json.loads(response)
print(data)

# 关闭浏览器
driver.quit()

# 将数据写入文件
with open('../job_data.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

print("数据已保存到 job_data.json 文件中")




import csv
import json

# 读取 JSON 文件
with open('../job_data.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 提取 jobList 数据
job_list = data['zpData']['jobList']

# 定义 CSV 文件的字段
fieldnames = [
    'jobName', 'salaryDesc', 'jobExperience', 'jobDegree', 'cityName',
    'areaDistrict', 'businessDistrict', 'brandName', 'brandIndustry',
    'brandScaleName', 'welfareList', 'skills'
]

# 写入 CSV 文件，使用 utf-8-sig 编码
with open('../job_list.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    # 写入表头
    writer.writeheader()

    # 写入数据行
    for job in job_list:
        # 将列表转换为字符串以便写入 CSV
        job['welfareList'] = ', '.join(job['welfareList'])
        job['skills'] = ', '.join(job['skills'])

        # 写入每一行数据
        writer.writerow({
            'jobName': job.get('jobName', ''),
            'salaryDesc': job.get('salaryDesc', ''),
            'jobExperience': job.get('jobExperience', ''),
            'jobDegree': job.get('jobDegree', ''),
            'cityName': job.get('cityName', ''),
            'areaDistrict': job.get('areaDistrict', ''),
            'businessDistrict': job.get('businessDistrict', ''),
            'brandName': job.get('brandName', ''),
            'brandIndustry': job.get('brandIndustry', ''),
            'brandScaleName': job.get('brandScaleName', ''),
            'welfareList': job['welfareList'],
            'skills': job['skills']
        })

print("数据已保存到 job_list.csv 文件中")

